bitkeeper revision 1.1713.1.13 (42b16610ojQcHPRC6Ao_1CKXClza9A)
authorsos22@douglas.cl.cam.ac.uk <sos22@douglas.cl.cam.ac.uk>
Thu, 16 Jun 2005 11:44:16 +0000 (11:44 +0000)
committersos22@douglas.cl.cam.ac.uk <sos22@douglas.cl.cam.ac.uk>
Thu, 16 Jun 2005 11:44:16 +0000 (11:44 +0000)
Slightly disgusting hack to avoid using lots of lock instructions on a uniprocessor
machine just because we happened to compile with CONFIG_SMP.  Essentially, we
make a big table of all of the instruction sequences which differ in ``easy''
ways between UP and SMP kernels, and then select which one to use at run time.

Signed-off-by: Steven Smith <sos22@cam.ac.uk>
.rootkeys
linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h
patches/linux-2.6.11/smp-alts.patch [new file with mode: 0644]

index d53afebc33967602023032a294765e782601a8bb..98804345abf1e1fed08d9507eb66b49f76d1d6c8 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 428359d4b3fDYtazwXi4UUmSWaOUew patches/linux-2.6.11/linux-2.6.11.12.patch
 4296fb998LGSWCcljGKbOCUv3h9uRQ patches/linux-2.6.11/net-csum.patch
 429ae875I9ZrqrRDjGD34IC2kzDREw patches/linux-2.6.11/rcu-nohz.patch
+42b165fcilFTNezi9NIsG2ecLZVU0w patches/linux-2.6.11/smp-alts.patch
 429ba3007184K-y6WHQ6KgY65-lEIQ patches/linux-2.6.11/udp-frag.patch
 424f001e_M1Tnxc52rDrmCLelnDWMQ patches/linux-2.6.11/x86_64-linux.patch
 3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile
index d1ffcb05d1bea97f501d8e20ecea0bcf14b0d001..dec06cdfd1ec101ad18e07b0455c62eee6418202 100644 (file)
@@ -372,6 +372,19 @@ config SMP
 
          If you don't know what to do here, say N.
 
+config SMP_ALTERNATIVES
+        bool "SMP alternatives support (EXPERIMENTAL)"
+        depends on SMP && EXPERIMENTAL
+        help
+          Try to reduce the overhead of running an SMP kernel on a uniprocessor
+          host slightly by replacing certain key instruction sequences
+          according to whether we currently have more than one CPU available.
+          This should provide a noticeable boost to performance when
+          running SMP kernels on UP machines, and have negligible impact
+          when running on an true SMP host.
+
+          If unsure, say N.
+
 config NR_CPUS
        int "Maximum number of CPUs (2-255)"
        range 2 255
index 3f2fef4a94e87c6ed70656e5c42fad35ecae552b..6973df049f1c696403cbc8e2235c345ec49fb870 100644 (file)
@@ -42,6 +42,7 @@ c-obj-$(CONFIG_ACPI_SRAT)     += srat.o
 c-obj-$(CONFIG_HPET_TIMER)     += time_hpet.o
 c-obj-$(CONFIG_EFI)            += efi.o efi_stub.o
 c-obj-$(CONFIG_EARLY_PRINTK)   += early_printk.o
+c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o
 
 EXTRA_AFLAGS   := -traditional
 
index a7160953762a618153048a51b34ebbc82d7f4d27..1956f9ceed5fa7ce1390a29cee4720fa652aa0f9 100644 (file)
@@ -54,6 +54,8 @@
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 
+#include <asm/smp_alt.h>
+
 #ifndef CONFIG_X86_IO_APIC
 #define Dprintk(args...)
 #endif
@@ -1186,6 +1188,10 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
                if (max_cpus <= cpucount+1)
                        continue;
 
+#ifdef CONFIG_SMP_ALTERNATIVES
+               if (kicked == 1)
+                       prepare_for_smp();
+#endif
                if (do_boot_cpu(cpu))
                        printk("CPU #%d not responding - cannot use it.\n",
                                                                cpu);
@@ -1301,6 +1307,11 @@ void __devinit smp_prepare_boot_cpu(void)
 /* must be called with the cpucontrol mutex held */
 static int __devinit cpu_enable(unsigned int cpu)
 {
+#ifdef CONFIG_SMP_ALTERNATIVES
+       if (num_online_cpus() == 1)
+               prepare_for_smp();
+#endif
+
        /* get the target out of its holding state */
        per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
        wmb();
@@ -1340,6 +1351,12 @@ int __cpu_disable(void)
        fixup_irqs(map);
        /* It's now safe to remove this processor from the online map */
        cpu_clear(cpu, cpu_online_map);
+
+#ifdef CONFIG_SMP_ALTERNATIVES
+       if (num_online_cpus() == 1)
+               unprepare_for_smp();
+#endif
+
        return 0;
 }
 
index 021acbd1593b06bc797edc0debfc41cef7e3284d..fcf995952662fe7078be7ef49e8bc1d66ab52e0d 100644 (file)
@@ -8,6 +8,7 @@
 #include <asm/segment.h>
 #include <asm/cpufeature.h>
 #include <asm-xen/hypervisor.h>
+#include <asm/smp_alt.h>
 
 #ifdef __KERNEL__
 
@@ -251,19 +252,19 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
        unsigned long prev;
        switch (size) {
        case 1:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+               __asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
                                     : "=a"(prev)
                                     : "q"(new), "m"(*__xg(ptr)), "0"(old)
                                     : "memory");
                return prev;
        case 2:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+               __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
                                     : "=a"(prev)
                                     : "q"(new), "m"(*__xg(ptr)), "0"(old)
                                     : "memory");
                return prev;
        case 4:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+               __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
                                     : "=a"(prev)
                                     : "q"(new), "m"(*__xg(ptr)), "0"(old)
                                     : "memory");
@@ -427,11 +428,55 @@ struct alt_instr {
 #endif
 
 #ifdef CONFIG_SMP
-#define smp_mb()       mb()
-#define smp_rmb()      rmb()
 #define smp_wmb()      wmb()
-#define smp_read_barrier_depends()     read_barrier_depends()
+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
+#define smp_alt_mb(instr)                                           \
+__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
+                    ".section __smp_alternatives,\"a\"\n"          \
+                    ".long 6667b\n"                                \
+                     ".long 6673f\n"                                \
+                    ".previous\n"                                  \
+                    ".section __smp_replacements,\"a\"\n"          \
+                    "6673:.byte 6668b-6667b\n"                     \
+                    ".byte 6670f-6669f\n"                          \
+                    ".byte 6671f-6670f\n"                          \
+                     ".byte 0\n"                                    \
+                    ".byte %c0\n"                                  \
+                    "6669:lock;addl $0,0(%%esp)\n"                 \
+                    "6670:" instr "\n"                             \
+                    "6671:\n"                                      \
+                    ".previous\n"                                  \
+                    :                                              \
+                    : "i" (X86_FEATURE_XMM2)                       \
+                    : "memory")
+#define smp_rmb() smp_alt_mb("lfence")
+#define smp_mb()  smp_alt_mb("mfence")
+#define set_mb(var, value) do {                                     \
+unsigned long __set_mb_temp;                                        \
+__asm__ __volatile__("6667:movl %1, %0\n6668:\n"                    \
+                    ".section __smp_alternatives,\"a\"\n"          \
+                    ".long 6667b\n"                                \
+                    ".long 6673f\n"                                \
+                    ".previous\n"                                  \
+                    ".section __smp_replacements,\"a\"\n"          \
+                    "6673: .byte 6668b-6667b\n"                    \
+                    ".byte 6670f-6669f\n"                          \
+                    ".byte 0\n"                                    \
+                    ".byte 6671f-6670f\n"                          \
+                    ".byte -1\n"                                   \
+                    "6669: xchg %1, %0\n"                          \
+                    "6670:movl %1, %0\n"                           \
+                    "6671:\n"                                      \
+                    ".previous\n"                                  \
+                    : "=m" (var), "=r" (__set_mb_temp)             \
+                    : "1" (value)                                  \
+                    : "memory"); } while (0)
+#else
+#define smp_rmb()      rmb()
+#define smp_mb()       mb()
 #define set_mb(var, value) do { xchg(&var, value); } while (0)
+#endif
+#define smp_read_barrier_depends()     read_barrier_depends()
 #else
 #define smp_mb()       barrier()
 #define smp_rmb()      barrier()
diff --git a/patches/linux-2.6.11/smp-alts.patch b/patches/linux-2.6.11/smp-alts.patch
new file mode 100644 (file)
index 0000000..db71ab1
--- /dev/null
@@ -0,0 +1,554 @@
+diff -Naur linux-2.6.11/arch/i386/Kconfig linux-2.6.11.post/arch/i386/Kconfig
+--- linux-2.6.11/arch/i386/Kconfig     2005-03-02 07:37:49.000000000 +0000
++++ linux-2.6.11.post/arch/i386/Kconfig        2005-06-10 13:42:35.000000000 +0100
+@@ -481,6 +481,19 @@
+         If you don't know what to do here, say N.
++config SMP_ALTERNATIVES
++      bool "SMP alternatives support (EXPERIMENTAL)"
++      depends on SMP && EXPERIMENTAL
++      help
++        Try to reduce the overhead of running an SMP kernel on a uniprocessor
++        host slightly by replacing certain key instruction sequences
++        according to whether we currently have more than one CPU available.
++        This should provide a noticeable boost to performance when
++        running SMP kernels on UP machines, and have negligible impact
++        when running on an true SMP host.
++
++          If unsure, say N.
++        
+ config NR_CPUS
+       int "Maximum number of CPUs (2-255)"
+       range 2 255
+diff -Naur linux-2.6.11/arch/i386/kernel/Makefile linux-2.6.11.post/arch/i386/kernel/Makefile
+--- linux-2.6.11/arch/i386/kernel/Makefile     2005-03-02 07:37:49.000000000 +0000
++++ linux-2.6.11.post/arch/i386/kernel/Makefile        2005-06-16 11:16:18.555332435 +0100
+@@ -32,6 +32,7 @@
+ obj-$(CONFIG_HPET_TIMER)      += time_hpet.o
+ obj-$(CONFIG_EFI)             += efi.o efi_stub.o
+ obj-$(CONFIG_EARLY_PRINTK)    += early_printk.o
++obj-$(CONFIG_SMP_ALTERNATIVES)  += smpalts.o
+ EXTRA_AFLAGS   := -traditional
+diff -Naur linux-2.6.11/arch/i386/kernel/smpalts.c linux-2.6.11.post/arch/i386/kernel/smpalts.c
+--- linux-2.6.11/arch/i386/kernel/smpalts.c    1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.11.post/arch/i386/kernel/smpalts.c       2005-06-16 11:23:39.300902424 +0100
+@@ -0,0 +1,76 @@
++#include <linux/kernel.h>
++#include <asm/system.h>
++#include <asm/smp_alt.h>
++#include <asm/processor.h>
++#include <asm/string.h>
++
++struct smp_replacement_record {
++      unsigned char targ_size;
++      unsigned char smp1_size;
++      unsigned char smp2_size;
++      unsigned char up_size;
++      unsigned char feature;
++      unsigned char data[0];
++};
++
++struct smp_alternative_record {
++      void *targ_start;
++      struct smp_replacement_record *repl;
++};
++
++extern struct smp_alternative_record __start_smp_alternatives_table,
++  __stop_smp_alternatives_table;
++
++void prepare_for_smp(void)
++{
++      struct smp_alternative_record *r;
++      printk(KERN_INFO "Enabling SMP...\n");
++      for (r = &__start_smp_alternatives_table;
++           r != &__stop_smp_alternatives_table;
++           r++) {
++              BUG_ON(r->repl->targ_size < r->repl->smp1_size);
++              BUG_ON(r->repl->targ_size < r->repl->smp2_size);
++              BUG_ON(r->repl->targ_size < r->repl->up_size);
++              if (r->repl->feature != (unsigned char)-1 &&
++                  boot_cpu_has(r->repl->feature)) {
++                      memcpy(r->targ_start,
++                             r->repl->data + r->repl->smp1_size,
++                             r->repl->smp2_size);
++                      memset(r->targ_start + r->repl->smp2_size,
++                             0x90,
++                             r->repl->targ_size - r->repl->smp2_size);
++              } else {
++                      memcpy(r->targ_start,
++                             r->repl->data,
++                             r->repl->smp1_size);
++                      memset(r->targ_start + r->repl->smp1_size,
++                             0x90,
++                             r->repl->targ_size - r->repl->smp1_size);
++              }
++      }
++      /* Paranoia */
++      asm volatile ("jmp 1f\n1:");
++      mb();
++}
++
++void unprepare_for_smp(void)
++{
++      struct smp_alternative_record *r;
++      printk(KERN_INFO "Disabling SMP...\n");
++      for (r = &__start_smp_alternatives_table;
++           r != &__stop_smp_alternatives_table;
++           r++) {
++              BUG_ON(r->repl->targ_size < r->repl->smp1_size);
++              BUG_ON(r->repl->targ_size < r->repl->smp2_size);
++              BUG_ON(r->repl->targ_size < r->repl->up_size);
++              memcpy(r->targ_start,
++                     r->repl->data + r->repl->smp1_size + r->repl->smp2_size,
++                     r->repl->up_size);
++              memset(r->targ_start + r->repl->up_size,
++                     0x90,
++                     r->repl->targ_size - r->repl->up_size);
++      }
++      /* Paranoia */
++      asm volatile ("jmp 1f\n1:");
++      mb();
++}
+diff -Naur linux-2.6.11/arch/i386/kernel/smpboot.c linux-2.6.11.post/arch/i386/kernel/smpboot.c
+--- linux-2.6.11/arch/i386/kernel/smpboot.c    2005-03-02 07:38:09.000000000 +0000
++++ linux-2.6.11.post/arch/i386/kernel/smpboot.c       2005-06-16 11:17:09.287064617 +0100
+@@ -1003,6 +1003,11 @@
+               if (max_cpus <= cpucount+1)
+                       continue;
++#ifdef CONFIG_SMP_ALTERNATIVES
++              if (kicked == 1)
++                      prepare_for_smp();
++#endif
++
+               if (do_boot_cpu(apicid))
+                       printk("CPU #%d not responding - cannot use it.\n",
+                                                               apicid);
+@@ -1118,6 +1123,11 @@
+               return -EIO;
+       }
++#ifdef CONFIG_SMP_ALTERNATIVES
++      if (num_online_cpus() == 1)
++              prepare_for_smp();
++#endif
++
+       local_irq_enable();
+       /* Unleash the CPU! */
+       cpu_set(cpu, smp_commenced_mask);
+diff -Naur linux-2.6.11/arch/i386/kernel/vmlinux.lds.S linux-2.6.11.post/arch/i386/kernel/vmlinux.lds.S
+--- linux-2.6.11/arch/i386/kernel/vmlinux.lds.S        2005-03-02 07:38:37.000000000 +0000
++++ linux-2.6.11.post/arch/i386/kernel/vmlinux.lds.S   2005-06-10 11:14:14.000000000 +0100
+@@ -30,6 +30,13 @@
+   __ex_table : { *(__ex_table) }
+   __stop___ex_table = .;
++  . = ALIGN(16);
++  __start_smp_alternatives_table = .;
++  __smp_alternatives : { *(__smp_alternatives) }
++  __stop_smp_alternatives_table = .;
++
++  __smp_replacements : { *(__smp_replacements) }
++
+   RODATA
+   /* writeable */
+diff -Naur linux-2.6.11/include/asm-i386/atomic.h linux-2.6.11.post/include/asm-i386/atomic.h
+--- linux-2.6.11/include/asm-i386/atomic.h     2005-03-02 07:37:51.000000000 +0000
++++ linux-2.6.11.post/include/asm-i386/atomic.h        2005-06-13 10:10:39.000000000 +0100
+@@ -4,18 +4,13 @@
+ #include <linux/config.h>
+ #include <linux/compiler.h>
+ #include <asm/processor.h>
++#include <asm/smp_alt.h>
+ /*
+  * Atomic operations that C can't guarantee us.  Useful for
+  * resource counting etc..
+  */
+-#ifdef CONFIG_SMP
+-#define LOCK "lock ; "
+-#else
+-#define LOCK ""
+-#endif
+-
+ /*
+  * Make sure gcc doesn't try to be clever and move things around
+  * on us. We need to use _exactly_ the address the user gave us,
+diff -Naur linux-2.6.11/include/asm-i386/bitops.h linux-2.6.11.post/include/asm-i386/bitops.h
+--- linux-2.6.11/include/asm-i386/bitops.h     2005-03-02 07:38:12.000000000 +0000
++++ linux-2.6.11.post/include/asm-i386/bitops.h        2005-06-13 10:11:54.000000000 +0100
+@@ -7,6 +7,7 @@
+ #include <linux/config.h>
+ #include <linux/compiler.h>
++#include <asm/smp_alt.h>
+ /*
+  * These have to be done with inline assembly: that way the bit-setting
+@@ -16,12 +17,6 @@
+  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+  */
+-#ifdef CONFIG_SMP
+-#define LOCK_PREFIX "lock ; "
+-#else
+-#define LOCK_PREFIX ""
+-#endif
+-
+ #define ADDR (*(volatile long *) addr)
+ /**
+@@ -41,7 +36,7 @@
+  */
+ static inline void set_bit(int nr, volatile unsigned long * addr)
+ {
+-      __asm__ __volatile__( LOCK_PREFIX
++      __asm__ __volatile__( LOCK
+               "btsl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+@@ -76,7 +71,7 @@
+  */
+ static inline void clear_bit(int nr, volatile unsigned long * addr)
+ {
+-      __asm__ __volatile__( LOCK_PREFIX
++      __asm__ __volatile__( LOCK
+               "btrl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+@@ -121,7 +116,7 @@
+  */
+ static inline void change_bit(int nr, volatile unsigned long * addr)
+ {
+-      __asm__ __volatile__( LOCK_PREFIX
++      __asm__ __volatile__( LOCK
+               "btcl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+@@ -140,7 +135,7 @@
+ {
+       int oldbit;
+-      __asm__ __volatile__( LOCK_PREFIX
++      __asm__ __volatile__( LOCK
+               "btsl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+@@ -180,7 +175,7 @@
+ {
+       int oldbit;
+-      __asm__ __volatile__( LOCK_PREFIX
++      __asm__ __volatile__( LOCK
+               "btrl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+@@ -231,7 +226,7 @@
+ {
+       int oldbit;
+-      __asm__ __volatile__( LOCK_PREFIX
++      __asm__ __volatile__( LOCK
+               "btcl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+diff -Naur linux-2.6.11/include/asm-i386/rwsem.h linux-2.6.11.post/include/asm-i386/rwsem.h
+--- linux-2.6.11/include/asm-i386/rwsem.h      2005-03-02 07:38:08.000000000 +0000
++++ linux-2.6.11.post/include/asm-i386/rwsem.h 2005-06-13 10:13:06.000000000 +0100
+@@ -40,6 +40,7 @@
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
++#include <asm/smp_alt.h>
+ struct rwsem_waiter;
+@@ -99,7 +100,7 @@
+ {
+       __asm__ __volatile__(
+               "# beginning down_read\n\t"
+-LOCK_PREFIX   "  incl      (%%eax)\n\t" /* adds 0x00000001, returns the old value */
++LOCK          "  incl      (%%eax)\n\t" /* adds 0x00000001, returns the old value */
+               "  js        2f\n\t" /* jump if we weren't granted the lock */
+               "1:\n\t"
+               LOCK_SECTION_START("")
+@@ -130,7 +131,7 @@
+               "  movl      %1,%2\n\t"
+               "  addl      %3,%2\n\t"
+               "  jle       2f\n\t"
+-LOCK_PREFIX   "  cmpxchgl  %2,%0\n\t"
++LOCK          "  cmpxchgl  %2,%0\n\t"
+               "  jnz       1b\n\t"
+               "2:\n\t"
+               "# ending __down_read_trylock\n\t"
+@@ -150,7 +151,7 @@
+       tmp = RWSEM_ACTIVE_WRITE_BIAS;
+       __asm__ __volatile__(
+               "# beginning down_write\n\t"
+-LOCK_PREFIX   "  xadd      %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
++LOCK          "  xadd      %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
+               "  testl     %%edx,%%edx\n\t" /* was the count 0 before? */
+               "  jnz       2f\n\t" /* jump if we weren't granted the lock */
+               "1:\n\t"
+@@ -188,7 +189,7 @@
+       __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
+       __asm__ __volatile__(
+               "# beginning __up_read\n\t"
+-LOCK_PREFIX   "  xadd      %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
++LOCK          "  xadd      %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
+               "  js        2f\n\t" /* jump if the lock is being waited upon */
+               "1:\n\t"
+               LOCK_SECTION_START("")
+@@ -214,7 +215,7 @@
+       __asm__ __volatile__(
+               "# beginning __up_write\n\t"
+               "  movl      %2,%%edx\n\t"
+-LOCK_PREFIX   "  xaddl     %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
++LOCK          "  xaddl     %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
+               "  jnz       2f\n\t" /* jump if the lock is being waited upon */
+               "1:\n\t"
+               LOCK_SECTION_START("")
+@@ -239,7 +240,7 @@
+ {
+       __asm__ __volatile__(
+               "# beginning __downgrade_write\n\t"
+-LOCK_PREFIX   "  addl      %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
++LOCK          "  addl      %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
+               "  js        2f\n\t" /* jump if the lock is being waited upon */
+               "1:\n\t"
+               LOCK_SECTION_START("")
+@@ -263,7 +264,7 @@
+ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+ {
+       __asm__ __volatile__(
+-LOCK_PREFIX   "addl %1,%0"
++LOCK            "addl %1,%0"
+               : "=m"(sem->count)
+               : "ir"(delta), "m"(sem->count));
+ }
+@@ -276,7 +277,7 @@
+       int tmp = delta;
+       __asm__ __volatile__(
+-LOCK_PREFIX   "xadd %0,(%2)"
++LOCK                    "xadd %0,(%2)"
+               : "+r"(tmp), "=m"(sem->count)
+               : "r"(sem), "m"(sem->count)
+               : "memory");
+diff -Naur linux-2.6.11/include/asm-i386/smp_alt.h linux-2.6.11.post/include/asm-i386/smp_alt.h
+--- linux-2.6.11/include/asm-i386/smp_alt.h    1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.11.post/include/asm-i386/smp_alt.h       2005-06-16 11:16:50.109433206 +0100
+@@ -0,0 +1,32 @@
++#ifndef __ASM_SMP_ALT_H__
++#define __ASM_SMP_ALT_H__
++
++#include <linux/config.h>
++
++#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
++#define LOCK \
++        "6677: nop\n" \
++      ".section __smp_alternatives,\"a\"\n" \
++      ".long 6677b\n" \
++      ".long 6678f\n" \
++      ".previous\n" \
++      ".section __smp_replacements,\"a\"\n" \
++      "6678: .byte 1\n" \
++      ".byte 1\n" \
++      ".byte 0\n" \
++        ".byte 1\n" \
++      ".byte -1\n" \
++      "lock\n" \
++      "nop\n" \
++      ".previous\n"
++void prepare_for_smp(void);
++void unprepare_for_smp(void);
++#else
++#define LOCK "lock ; "
++#endif
++#else
++#define LOCK ""
++#endif
++
++#endif /* __ASM_SMP_ALT_H__ */
+diff -Naur linux-2.6.11/include/asm-i386/spinlock.h linux-2.6.11.post/include/asm-i386/spinlock.h
+--- linux-2.6.11/include/asm-i386/spinlock.h   2005-03-02 07:37:50.000000000 +0000
++++ linux-2.6.11.post/include/asm-i386/spinlock.h      2005-06-13 14:13:52.000000000 +0100
+@@ -6,6 +6,7 @@
+ #include <asm/page.h>
+ #include <linux/config.h>
+ #include <linux/compiler.h>
++#include <asm/smp_alt.h>
+ asmlinkage int printk(const char * fmt, ...)
+       __attribute__ ((format (printf, 1, 2)));
+@@ -47,8 +48,9 @@
+ #define spin_unlock_wait(x)   do { barrier(); } while(spin_is_locked(x))
+ #define spin_lock_string \
+-      "\n1:\t" \
+-      "lock ; decb %0\n\t" \
++        "1:\n" \
++      LOCK \
++      "decb %0\n\t" \
+       "jns 3f\n" \
+       "2:\t" \
+       "rep;nop\n\t" \
+@@ -58,8 +60,9 @@
+       "3:\n\t"
+ #define spin_lock_string_flags \
+-      "\n1:\t" \
+-      "lock ; decb %0\n\t" \
++        "1:\n" \
++      LOCK \
++      "decb %0\n\t" \
+       "jns 4f\n\t" \
+       "2:\t" \
+       "testl $0x200, %1\n\t" \
+@@ -121,10 +124,34 @@
+ static inline int _raw_spin_trylock(spinlock_t *lock)
+ {
+       char oldval;
++#ifdef CONFIG_SMP_ALTERNATIVES
+       __asm__ __volatile__(
+-              "xchgb %b0,%1"
++              "1:movb %1,%b0\n"
++              "movb $0,%1\n"
++              "2:"
++              ".section __smp_alternatives,\"a\"\n"
++              ".long 1b\n"
++              ".long 3f\n"
++              ".previous\n"
++              ".section __smp_replacements,\"a\"\n"
++              "3: .byte 2b - 1b\n"
++              ".byte 5f-4f\n"
++              ".byte 0\n"
++              ".byte 6f-5f\n"
++              ".byte -1\n"
++              "4: xchgb %b0,%1\n"
++              "5: movb %1,%b0\n"
++              "movb $0,%1\n"
++              "6:\n"
++              ".previous\n"
+               :"=q" (oldval), "=m" (lock->slock)
+               :"0" (0) : "memory");
++#else
++      __asm__ __volatile__(
++              "xchgb %b0,%1\n"
++              :"=q" (oldval), "=m" (lock->slock)
++              :"0" (0) : "memory");
++#endif
+       return oldval > 0;
+ }
+@@ -225,8 +252,8 @@
+       __build_write_lock(rw, "__write_lock_failed");
+ }
+-#define _raw_read_unlock(rw)          asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+-#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
++#define _raw_read_unlock(rw)  asm volatile(LOCK "incl %0" :"=m" ((rw)->lock) : : "memory")
++#define _raw_write_unlock(rw) asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+ static inline int _raw_read_trylock(rwlock_t *lock)
+ {
+diff -Naur linux-2.6.11/include/asm-i386/system.h linux-2.6.11.post/include/asm-i386/system.h
+--- linux-2.6.11/include/asm-i386/system.h     2005-03-02 07:37:30.000000000 +0000
++++ linux-2.6.11.post/include/asm-i386/system.h        2005-06-15 13:21:40.000000000 +0100
+@@ -5,7 +5,7 @@
+ #include <linux/kernel.h>
+ #include <asm/segment.h>
+ #include <asm/cpufeature.h>
+-#include <linux/bitops.h> /* for LOCK_PREFIX */
++#include <asm/smp_alt.h>
+ #ifdef __KERNEL__
+@@ -249,19 +249,19 @@
+       unsigned long prev;
+       switch (size) {
+       case 1:
+-              __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
++              __asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 2:
+-              __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
++              __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 4:
+-              __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
++              __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+@@ -425,11 +425,55 @@
+ #endif
+ #ifdef CONFIG_SMP
+-#define smp_mb()      mb()
+-#define smp_rmb()     rmb()
+ #define smp_wmb()     wmb()
+-#define smp_read_barrier_depends()    read_barrier_depends()
++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
++#define smp_alt_mb(instr)                                           \
++__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
++                   ".section __smp_alternatives,\"a\"\n"          \
++                   ".long 6667b\n"                                \
++                     ".long 6673f\n"                                \
++                   ".previous\n"                                  \
++                   ".section __smp_replacements,\"a\"\n"          \
++                   "6673:.byte 6668b-6667b\n"                     \
++                   ".byte 6670f-6669f\n"                          \
++                   ".byte 6671f-6670f\n"                          \
++                     ".byte 0\n"                                    \
++                   ".byte %c0\n"                                  \
++                   "6669:lock;addl $0,0(%%esp)\n"                 \
++                   "6670:" instr "\n"                             \
++                   "6671:\n"                                      \
++                   ".previous\n"                                  \
++                   :                                              \
++                   : "i" (X86_FEATURE_XMM2)                       \
++                   : "memory")
++#define smp_rmb() smp_alt_mb("lfence")
++#define smp_mb()  smp_alt_mb("mfence")
++#define set_mb(var, value) do {                                     \
++unsigned long __set_mb_temp;                                        \
++__asm__ __volatile__("6667:movl %1, %0\n6668:\n"                    \
++                   ".section __smp_alternatives,\"a\"\n"          \
++                   ".long 6667b\n"                                \
++                   ".long 6673f\n"                                \
++                   ".previous\n"                                  \
++                   ".section __smp_replacements,\"a\"\n"          \
++                   "6673: .byte 6668b-6667b\n"                    \
++                   ".byte 6670f-6669f\n"                          \
++                   ".byte 0\n"                                    \
++                   ".byte 6671f-6670f\n"                          \
++                   ".byte -1\n"                                   \
++                   "6669: xchg %1, %0\n"                          \
++                   "6670:movl %1, %0\n"                           \
++                   "6671:\n"                                      \
++                   ".previous\n"                                  \
++                   : "=m" (var), "=r" (__set_mb_temp)             \
++                   : "1" (value)                                  \
++                   : "memory"); } while (0)
++#else
++#define smp_rmb()     rmb()
++#define smp_mb()      mb()
+ #define set_mb(var, value) do { xchg(&var, value); } while (0)
++#endif
++#define smp_read_barrier_depends()    read_barrier_depends()
+ #else
+ #define smp_mb()      barrier()
+ #define smp_rmb()     barrier()